// Copyright 2019 The TCMalloc Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Unittest for the TCMalloc implementation.
//
// * The test consists of a set of threads.
// * Each thread maintains a set of allocated objects, with
//   a bound on the total amount of data in the set.
// * Each allocated object's contents are generated by
//   hashing the object pointer, and a generation count
//   in the object.  This allows us to easily check for
//   data corruption.
// * At any given step, the thread can do any of the following:
//     a. Allocate an object
//     b. Increment an object's generation count and update
//        its contents.
//     c. Pass the object to another thread
//     d. Free an object
//   Also, at the end of every step, object(s) are freed to maintain
//   the memory upper-bound.

#define _XOPEN_SOURCE 600
#define _GNU_SOURCE 1
#include <errno.h>
#include <fcntl.h>
#include <malloc.h>
#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <unistd.h>

#include <algorithm>
#include <atomic>
#include <cstddef>
#include <iostream>
#include <iterator>
#include <limits>
#include <memory>
#include <new>
#include <random>
#include <string>
#include <thread>  // NOLINT(build/c++11)
#include <type_traits>
#include <utility>
#include <vector>

#include "benchmark/benchmark.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "absl/base/casts.h"
#include "absl/base/internal/sysinfo.h"
#include "absl/base/macros.h"
#include "absl/base/optimization.h"
#include "absl/numeric/bits.h"
#include "absl/random/random.h"
#include "absl/strings/numbers.h"
#include "absl/strings/str_format.h"
#include "absl/synchronization/mutex.h"
#include "absl/utility/utility.h"
#include "tcmalloc/internal/declarations.h"
#include "tcmalloc/internal/logging.h"
#include "tcmalloc/internal/parameter_accessors.h"
#include "tcmalloc/malloc_extension.h"
#include "tcmalloc/testing/testutil.h"
#include "tcmalloc/testing/thread_manager.h"

// Windows doesn't define pvalloc and a few other obsolete unix
// functions; nor does it define posix_memalign (which is not obsolete).
#if defined(_WIN32)
#define cfree free
#define valloc malloc
#define pvalloc malloc
static inline int PosixMemalign(void** ptr, size_t align, size_t size) {
  tcmalloc::Crash(tcmalloc::kCrash, __FILE__, __LINE__,
                  "posix_memalign not supported on windows");
}

// OS X defines posix_memalign in some OS versions but not others;
// it's confusing enough to check that it's easiest to just not to test.
#elif defined(__APPLE__)
static inline int PosixMemalign(void** ptr, size_t align, size_t size) {
  tcmalloc::Crash(tcmalloc::kCrash, __FILE__, __LINE__,
                  "posix_memalign not supported on OS X");
}

#else
#define OS_SUPPORTS_MEMALIGN
static inline int PosixMemalign(void** ptr, size_t align, size_t size) {
  return posix_memalign(ptr, align, size);
}

#endif

// Testing parameters
//
// When making aligned allocations, we pick a power of two up to 1 <<
// kLogMaxMemalign.
const int kLogMaxMemalign = 18;

static const int kSizeBits = 8 * sizeof(size_t);
static const size_t kMaxTestSize = ~static_cast<size_t>(0);
static const size_t kMaxSignedSize = ((size_t(1) << (kSizeBits - 1)) - 1);

namespace tcmalloc {
extern ABSL_ATTRIBUTE_WEAK bool want_hpaa();
}  // namespace tcmalloc

int main(int argc, char** argv) {
  testing::InitGoogleTest(&argc, argv);
  SetTestResourceLimit();

  return RUN_ALL_TESTS();
}

namespace tcmalloc {
namespace {

TEST(TcmallocTest, EmptyAllocations) {
  // Check that empty allocation works
  void* p1 = ::operator new(0);
  ASSERT_NE(p1, nullptr);
  void* p2 = ::operator new(0);
  ASSERT_NE(p2, nullptr);
  ASSERT_NE(p1, p2);
  ::operator delete(p1);
  ::operator delete(p2);
}

TEST(TcmallocTest, LargeAllocation) {
  // Check that "lots" of memory can be allocated
  constexpr size_t kMB = 1 << 20;
  ::operator delete(::operator new(100 * kMB));
}

TEST(TcmallocTest, Calloc) {
  // Check calloc() with various arguments

  struct TestCase {
    size_t n;
    size_t s;
    bool ok;
  };

  TestCase tests[] = {
      {0, 0, true},
      {0, 1, true},
      {1, 1, true},
      {1 << 10, 0, true},
      {1 << 20, 0, true},
      {0, 1 << 10, true},
      {0, 1 << 20, true},
      {1 << 20, 2, true},
      {2, 1 << 20, true},
      {1000, 1000, true},
      {kMaxTestSize, 2, false},
      {2, kMaxTestSize, false},
      {kMaxTestSize, kMaxTestSize, false},
      {kMaxSignedSize, 3, false},
      {3, kMaxSignedSize, false},
      {kMaxSignedSize, kMaxSignedSize, false},
  };

  for (auto t : tests) {
    SCOPED_TRACE(absl::StrFormat("calloc(%x, %x)", t.n, t.s));

    void* ptr = calloc(t.n, t.s);
    benchmark::DoNotOptimize(ptr);

    EXPECT_EQ(t.ok, ptr != nullptr);
    if (ptr != nullptr) {
      memset(ptr, 0, t.n * t.s);
      benchmark::DoNotOptimize(ptr);
    }

    // This is harmless if p == nullptr.
    free(ptr);
  }
}

TEST(TcmallocTest, Realloc) {
  // Test that realloc doesn't always reallocate and copy memory.

  // When sampling, we always allocate in units of page-size, which makes
  // reallocs of small sizes do extra work (thus, failing these checks).  Since
  // sampling is random, we turn off sampling to make sure that doesn't happen
  // to us here.
  ScopedProfileSamplingRate s(0);  // turn off sampling

  int start_sizes[] = {100, 1000, 10000, 100000};
  int deltas[] = {1, -2, 4, -8, 16, -32, 64, -128};

  for (int s = 0; s < sizeof(start_sizes) / sizeof(*start_sizes); ++s) {
    void* p = malloc(start_sizes[s]);
    // We stash a copy of the pointer p so we can reference it later.  We must
    // work with the return value of p.
    //
    // Even if we successfully determine that realloc's return value is
    // equivalent to its input value, we must use the returned value under
    // penalty of UB.
    const intptr_t orig_ptr = absl::bit_cast<intptr_t>(p);
    benchmark::DoNotOptimize(p);

    ASSERT_NE(p, nullptr);
    // The larger the start-size, the larger the non-reallocing delta.
    for (int d = 0; d < (s + 1) * 2; ++d) {
      p = realloc(p, start_sizes[s] + deltas[d]);
      const intptr_t new_ptr = absl::bit_cast<intptr_t>(p);
      benchmark::DoNotOptimize(p);

      ASSERT_EQ(orig_ptr, new_ptr)
          << ": realloc should not allocate new memory"
          << " (" << start_sizes[s] << " + " << deltas[d] << ")";
    }
    // Test again, but this time reallocing smaller first.
    for (int d = 0; d < s * 2; ++d) {
      p = realloc(p, start_sizes[s] - deltas[d]);
      const intptr_t new_ptr = absl::bit_cast<intptr_t>(p);
      benchmark::DoNotOptimize(p);

      ASSERT_EQ(orig_ptr, new_ptr)
          << ": realloc should not allocate new memory"
          << " (" << start_sizes[s] << " + " << -deltas[d] << ")";
    }
    free(p);
  }
}

TEST(TcmallocTest, MemalignRealloc) {
  constexpr size_t kDummySize = 42;
  char contents[kDummySize];
  memset(contents, 0x11, kDummySize);

  void* xs[2];
  xs[0] = memalign(16, kDummySize);
  ASSERT_EQ(0, posix_memalign(&xs[1], 16, kDummySize));

  for (void* x : xs) {
    memcpy(x, contents, sizeof(contents));

    ASSERT_NE(nullptr, x);
    void* y = realloc(x, 2 * kDummySize);
    // Reallocating memory obtained for memalign or posix_memalign should work.
    EXPECT_EQ(memcmp(contents, y, sizeof(contents)), 0);
    free(y);
  }
}

struct Object {
  Object() : ptr(nullptr), size(0), generation(0) {}

  Object(const Object& o) = delete;
  Object(Object&& o)
      : ptr(absl::exchange(o.ptr, nullptr)),
        size(absl::exchange(o.size, 0)),
        generation(absl::exchange(o.generation, 0)) {}

  Object& operator=(const Object& o) = delete;
  Object& operator=(Object&& o) {
    using std::swap;
    swap(ptr, o.ptr);
    swap(size, o.size);
    swap(generation, o.generation);

    return *this;
  }

  void* ptr;       // Allocated pointer
  int size;        // Allocated size
  int generation;  // Generation counter of object contents
};

struct ABSL_CACHELINE_ALIGNED State {
  Object RemoveRandomObject() {
    size_t index = absl::Uniform<size_t>(rng, 0, owned.size());

    using std::swap;
    swap(owned[index], owned.back());
    Object to_remove = std::move(owned.back());
    owned.pop_back();

    return to_remove;
  }

  // These objects are accessed exclusively by a single thread and do not
  // require locking.
  absl::BitGen rng;
  std::vector<Object> owned;

  // Other threads can pass us objects.
  absl::Mutex ABSL_CACHELINE_ALIGNED lock;
  std::vector<Object> inbound ABSL_GUARDED_BY(lock);
};

class AllocatorHarness {
 public:
  explicit AllocatorHarness(int nthreads)
      : nthreads_(nthreads),
        state_(nthreads),
        bytes_available_(nthreads * kSizePerThread) {}

  ~AllocatorHarness() {
    for (auto& state : state_) {
      for (auto& o : state.owned) {
        sized_delete(o.ptr, o.size);
      }

      absl::MutexLock m(&state.lock);
      for (auto& o : state.inbound) {
        sized_delete(o.ptr, o.size);
      }
    }
  }

  void Run(int thread_id) {
    // Take ownership of inbound objects.
    auto& state = state_[thread_id];

    std::vector<Object> tmp;
    {
      absl::MutexLock m(&state.lock);
      tmp.swap(state.inbound);
    }

    state.owned.reserve(state.owned.size() + tmp.size());
    for (auto& o : tmp) {
      state.owned.push_back(std::move(o));
    }
    tmp.clear();

    const double coin = absl::Uniform(state.rng, 0., 1.);
    if (coin < 0.45) {
      // Allocate
      size_t size = absl::LogUniform<size_t>(state.rng, 1, kMaxTestSize);

      bool success = false;
      {
        absl::MutexLock m(&lock_);
        if (bytes_available_ >= size) {
          bytes_available_ -= size;
          success = true;
        }
      }

      if (success) {
        Object o;
        o.ptr = ::operator new(size);
        o.size = size;

        FillContents(o);

        state.owned.push_back(std::move(o));
        return;
      }

      // Fall through to try deallocating.
    }

    if (state.owned.empty()) {
      return;
    }

    if (coin < 0.9) {
      // Deallocate
      Object to_delete = state.RemoveRandomObject();

      CheckContents(to_delete);

      sized_delete(to_delete.ptr, to_delete.size);

      absl::MutexLock m(&lock_);
      bytes_available_ += to_delete.size;
      return;
    } else if (coin < 0.92) {
      // Update an object.
      size_t index = absl::Uniform(state.rng, 0u, state.owned.size());

      auto& obj = state.owned[index];
      CheckContents(obj);
      obj.generation++;
      FillContents(obj);
    } else {
      // Hand an object to another thread.
      int thread = absl::Uniform(state.rng, 0, nthreads_);

      Object to_transfer = state.RemoveRandomObject();

      auto& remote_state = state_[thread];
      absl::MutexLock m(&remote_state.lock);
      remote_state.inbound.push_back(std::move(to_transfer));
    }
  }

 private:
  // Fill object contents according to ptr/generation
  void FillContents(Object& object) {
    std::mt19937 r(reinterpret_cast<intptr_t>(object.ptr) & 0x7fffffff);
    for (int i = 0; i < object.generation; ++i) {
      absl::Uniform<uint32_t>(r);
    }
    const char c = absl::Uniform<char>(r, CHAR_MIN, CHAR_MAX);
    memset(object.ptr, c, std::min(ABSL_CACHELINE_SIZE, object.size));
    if (object.size > ABSL_CACHELINE_SIZE) {
      memset(static_cast<char*>(object.ptr) + object.size - ABSL_CACHELINE_SIZE,
             c, ABSL_CACHELINE_SIZE);
    }
  }

  // Check object contents
  void CheckContents(const Object& object) {
    std::mt19937 r(reinterpret_cast<intptr_t>(object.ptr) & 0x7fffffff);
    for (int i = 0; i < object.generation; ++i) {
      absl::Uniform<uint32_t>(r);
    }

    // For large objects, we just check a prefix/suffix
    const char expected = absl::Uniform<char>(r, CHAR_MIN, CHAR_MAX);
    const int limit1 = std::min(object.size, ABSL_CACHELINE_SIZE);
    const int start2 = std::max(limit1, object.size - ABSL_CACHELINE_SIZE);
    for (int i = 0; i < limit1; ++i) {
      ASSERT_EQ(static_cast<const char*>(object.ptr)[i], expected);
    }
    for (int i = start2; i < object.size; ++i) {
      ASSERT_EQ(static_cast<const char*>(object.ptr)[i], expected);
    }
  }

  static constexpr size_t kMaxTestSize = 1 << 16;
  static constexpr size_t kSizePerThread = 4 << 20;

  int nthreads_;
  std::vector<State> state_;

  ABSL_CACHELINE_ALIGNED absl::Mutex lock_;
  size_t bytes_available_ ABSL_GUARDED_BY(lock_);
};

TEST(TCMallocTest, Multithreaded) {
  const int kThreads = 10;
  ThreadManager mgr;
  AllocatorHarness harness(kThreads);

  mgr.Start(kThreads, [&](int thread_id) { harness.Run(thread_id); });

  absl::SleepFor(absl::Seconds(5));

  mgr.Stop();
}

TEST(TCMallocTest, HugeThreadCache) {
  // Allocate more than 2^16 objects to trigger an integer overflow of 16-bit
  // counters.
  constexpr int kNum = 70000;
  constexpr size_t kSize = 10;
  std::vector<void*> arr;
  arr.reserve(kNum);

  for (int i = 0; i < kNum; i++) {
    arr.push_back(::operator new(kSize));
  }

  for (int i = 0; i < kNum; i++) {
    ::operator delete(arr[i], kSize);
  }
}

TEST(TCMallocTest, EnormousAllocations) {
  absl::BitGen rand;

  // Check that asking for stuff tiny bit smaller than largest possible
  // size returns NULL.
  for (size_t i = 0; i < 70000; i += absl::Uniform(rand, 1, 20)) {
    size_t size = kMaxTestSize - i;
    // Convince the compiler that size may change, as to suppress
    // optimization/warnings around the size being too large.
    benchmark::DoNotOptimize(size);
    void* p;

    p = malloc(size);
    ASSERT_EQ(nullptr, p);
    EXPECT_EQ(ENOMEM, errno);

    p = ::operator new(size, std::nothrow);
    ASSERT_EQ(nullptr, p);
    p = ::operator new(size, static_cast<std::align_val_t>(16), std::nothrow);
    ASSERT_EQ(nullptr, p);

    size_t alignment = sizeof(p) << absl::Uniform(rand, 1, kLogMaxMemalign);
    ASSERT_NE(0, alignment);
    ASSERT_EQ(0, alignment % sizeof(void*));
    ASSERT_TRUE(absl::has_single_bit(alignment)) << alignment;
    int err = PosixMemalign(&p, alignment, size);
    ASSERT_EQ(ENOMEM, err);
  }

  // Asking for memory sizes near signed/unsigned boundary (kMaxSignedSize)
  // might work or not, depending on the amount of virtual memory.
  for (size_t i = 0; i < 100; i++) {
    void* p;
    p = malloc(kMaxSignedSize + i);
    free(p);
    p = malloc(kMaxSignedSize - i);
    free(p);
  }

  for (size_t i = 0; i < 100; i++) {
    void* p;
    p = ::operator new(kMaxSignedSize + i, std::nothrow);
    ::operator delete(p);
    p = ::operator new(kMaxSignedSize - i, std::nothrow);
    ::operator delete(p);
  }

  // Check that ReleaseMemoryToSystem has no visible effect (aka, does not crash
  // the test):
  MallocExtension::ReleaseMemoryToSystem(std::numeric_limits<size_t>::max());
}

static size_t GetUnmappedBytes() {
  absl::optional<size_t> bytes =
      MallocExtension::GetNumericProperty("tcmalloc.pageheap_unmapped_bytes");
  CHECK_CONDITION(bytes.has_value());
  return *bytes;
}

TEST(TCMallocTest, ReleaseMemoryToSystem) {
  // Similarly, the hugepage-aware allocator doesn't agree with PH about
  // where release is called for.
  if (&tcmalloc::want_hpaa == nullptr || tcmalloc::want_hpaa()) {
    return;
  }

  static const int MB = 1048576;
  void* a = ::operator new(MB);
  void* b = ::operator new(MB);
  MallocExtension::ReleaseMemoryToSystem(std::numeric_limits<size_t>::max());
  size_t starting_bytes = GetUnmappedBytes();

  // Calling ReleaseMemoryToSystem() a second time shouldn't do anything.
  MallocExtension::ReleaseMemoryToSystem(std::numeric_limits<size_t>::max());
  EXPECT_EQ(starting_bytes, GetUnmappedBytes());

  // ReleaseMemoryToSystem shouldn't do anything either.
  MallocExtension::ReleaseMemoryToSystem(MB);
  EXPECT_EQ(starting_bytes, GetUnmappedBytes());

  ::operator delete(a);

  // The span to release should be 1MB.
  MallocExtension::ReleaseMemoryToSystem(MB / 2);
  EXPECT_EQ(starting_bytes + MB, GetUnmappedBytes());

  // Should do nothing since the previous call released too much.
  MallocExtension::ReleaseMemoryToSystem(MB / 4);
  EXPECT_EQ(starting_bytes + MB, GetUnmappedBytes());

  ::operator delete(b);

  // Use up the extra MB/4 bytes from 'a' and also release 'b'.
  MallocExtension::ReleaseMemoryToSystem(MB / 2);
  EXPECT_EQ(starting_bytes + 2 * MB, GetUnmappedBytes());

  // Should do nothing since the previous call released too much.
  MallocExtension::ReleaseMemoryToSystem(MB / 2);
  EXPECT_EQ(starting_bytes + 2 * MB, GetUnmappedBytes());

  // Nothing else to release.
  MallocExtension::ReleaseMemoryToSystem(std::numeric_limits<size_t>::max());
  EXPECT_EQ(starting_bytes + 2 * MB, GetUnmappedBytes());

  a = ::operator new(MB);
  ::operator delete(a);
  EXPECT_EQ(starting_bytes + MB, GetUnmappedBytes());

  // Releasing less than a page should still trigger a release.
  MallocExtension::ReleaseMemoryToSystem(1);
  EXPECT_EQ(starting_bytes + 2 * MB, GetUnmappedBytes());
}

TEST(TCMallocTest, NothrowSizedDelete) {
  struct Foo {
    double a;
  };
  // Foo should correspond to a size class used by new, but not by malloc.
  static_assert(sizeof(Foo) == 8, "Unexpected size for Foo");

  static constexpr int kNum = 100;
  Foo* ptrs[kNum];
  for (int i = 0; i < kNum; i++) {
    ptrs[i] = new (std::nothrow) Foo;
  }
  for (int i = 0; i < kNum; i++) {
    delete ptrs[i];
  }
}

TEST(TCMallocTest, NothrowSizedDeleteArray) {
  struct Foo {
    ~Foo() {}
    double a;
  };
  // Foo should correspond to a size class used by new, but not by malloc,
  // for some sizes k, sizeof(size_t) + sizeof(Foo) * k.  (sizeof(size_t) being
  // the size cookie of the implementation.)
  static_assert(sizeof(Foo) == 8, "Unexpected size for Foo");
  // With a non-trivially destructible type, we expect the compiler to insert a
  // size cookie so it can invoke sized delete[].
  static_assert(!std::is_trivially_destructible<Foo>::value,
                "Foo should not be trivially destructable, for sized delete[]");

  static constexpr int kNum = 100;
  Foo* ptrs[kNum];
  for (int i = 0; i < kNum; i++) {
    ptrs[i] = new (std::nothrow) Foo[i % 10];
  }
  for (int i = 0; i < kNum; i++) {
    delete[] ptrs[i];
  }
}

TEST(TCMallocTest, MallocAlignment) {
  static constexpr int kNum = 100;

  for (int lg = 0; lg < 16; lg++) {
    const size_t sizes[3] = {
        static_cast<size_t>((1 << lg) - 1),
        static_cast<size_t>(1 << lg),
        static_cast<size_t>((1 << lg) + 1),
    };
    void* ptrs[kNum * ABSL_ARRAYSIZE(sizes)];
    int i = 0;
    for (size_t size : sizes) {
      for (int j = 0; j < kNum; i++, j++) {
        ptrs[i] = malloc(size);
        uintptr_t p = reinterpret_cast<uintptr_t>(ptrs[i]);
        ASSERT_EQ(0, p % alignof(std::max_align_t)) << size << " " << j;
      }
    }

    for (void* ptr : ptrs) {
      free(ptr);
    }
  }
}

TEST(TCMallocTest, CallocAlignment) {
  static constexpr int kNum = 100;

  for (int lg = 0; lg < 16; lg++) {
    const size_t sizes[3] = {
        static_cast<size_t>((1 << lg) - 1),
        static_cast<size_t>(1 << lg),
        static_cast<size_t>((1 << lg) + 1),
    };
    void* ptrs[kNum * ABSL_ARRAYSIZE(sizes)];
    int i = 0;
    for (size_t size : sizes) {
      for (int j = 0; j < kNum; i++, j++) {
        ptrs[i] = calloc(size, (1 << (j % 5)));
        uintptr_t p = reinterpret_cast<uintptr_t>(ptrs[i]);
        ASSERT_EQ(0, p % alignof(std::max_align_t)) << size << " " << j;
      }
    }

    for (void* ptr : ptrs) {
      free(ptr);
    }
  }
}

TEST(TCMallocTest, ReallocAlignment) {
  static constexpr int kNum = 100;

  for (int lg = 0; lg < 16; lg++) {
    const size_t sizes[3] = {
        static_cast<size_t>((1 << lg) - 1),
        static_cast<size_t>(1 << lg),
        static_cast<size_t>((1 << lg) + 1),
    };
    void* ptrs[kNum * ABSL_ARRAYSIZE(sizes)];
    int i = 0;
    for (size_t size : sizes) {
      for (int j = 0; j < kNum; i++, j++) {
        ptrs[i] = malloc(size);
        uintptr_t p = reinterpret_cast<uintptr_t>(ptrs[i]);
        ASSERT_EQ(0, p % alignof(std::max_align_t)) << size << " " << j;

        const size_t new_size = (1 << (kNum % 16)) + (kNum % 3) - 1;
        void* new_ptr = realloc(ptrs[i], new_size);
        if (new_ptr == nullptr) {
          continue;
        }
        ptrs[i] = new_ptr;

        p = reinterpret_cast<uintptr_t>(new_ptr);
        ASSERT_EQ(0, p % alignof(std::max_align_t))
            << size << " -> " << new_size << " " << j;
      }
    }

    for (void* ptr : ptrs) {
      free(ptr);
    }
  }
}

TEST(TCMallocTest, AlignedNew) {
  absl::BitGen rand;

  struct alloc {
    void* ptr;
    size_t size;
    std::align_val_t alignment;
  };

  std::vector<alloc> allocated;
  for (int i = 1; i < 100; ++i) {
    alloc a;
    a.size = absl::LogUniform(rand, 0, 1 << 20);
    a.alignment = static_cast<std::align_val_t>(1 << absl::Uniform(rand, 0, 6));

    a.ptr = ::operator new(a.size, a.alignment);
    ASSERT_NE(a.ptr, nullptr);
    ASSERT_EQ(0, reinterpret_cast<uintptr_t>(a.ptr) %
                     static_cast<size_t>(a.alignment));
    allocated.emplace_back(a);
  }
  for (const auto& p : allocated) {
    int choice = absl::Uniform(rand, 0, 3);

    switch (choice) {
      case 0:
        ::operator delete(p.ptr);
        break;
      case 1:
        ::operator delete(p.ptr, p.alignment);
        break;
      case 2:
        ::operator delete(p.ptr, p.size, p.alignment);
        break;
    }
  }
}

TEST(TCMallocTest, AlignedNewArray) {
  absl::BitGen rand;

  struct alloc {
    void* ptr;
    size_t size;
    std::align_val_t alignment;
  };

  std::vector<alloc> allocated;
  for (int i = 1; i < 100; ++i) {
    alloc a;
    a.size = absl::LogUniform(rand, 0, 1 << 20);
    a.alignment = static_cast<std::align_val_t>(1 << absl::Uniform(rand, 0, 6));

    a.ptr = ::operator new[](a.size, a.alignment);
    ASSERT_NE(a.ptr, nullptr);
    ASSERT_EQ(0, reinterpret_cast<uintptr_t>(a.ptr) %
                     static_cast<size_t>(a.alignment));
    allocated.emplace_back(a);
  }
  for (const auto& p : allocated) {
    int choice = absl::Uniform(rand, 0, 3);

    switch (choice) {
      case 0:
        ::operator delete[](p.ptr);
        break;
      case 1:
        ::operator delete[](p.ptr, p.alignment);
        break;
      case 2:
        ::operator delete[](p.ptr, p.size, p.alignment);
        break;
    }
  }
}

TEST(TCMallocTest, NothrowAlignedNew) {
  absl::BitGen rand;
  for (int i = 1; i < 100; ++i) {
    size_t size = kMaxTestSize - i;
    std::align_val_t alignment =
        static_cast<std::align_val_t>(1 << absl::Uniform(rand, 0, 6));
    // Convince the compiler that size may change, as to suppress
    // optimization/warnings around the size being too large.
    benchmark::DoNotOptimize(size);
    void* p = ::operator new(size, alignment, std::nothrow);
    ASSERT_EQ(p, nullptr);
  }
  for (int i = 1; i < 100; ++i) {
    size_t size = absl::LogUniform(rand, 0, 1 << 20);
    std::align_val_t alignment =
        static_cast<std::align_val_t>(1 << absl::Uniform(rand, 0, 6));
    void* ptr = ::operator new(size, alignment, std::nothrow);
    ASSERT_NE(ptr, nullptr);
    ASSERT_EQ(
        0, reinterpret_cast<uintptr_t>(ptr) % static_cast<size_t>(alignment));
    ::operator delete(ptr, alignment, std::nothrow);
  }
}

TEST(TCMallocTest, NothrowAlignedNewArray) {
  absl::BitGen rand;
  for (int i = 1; i < 100; ++i) {
    size_t size = kMaxTestSize - i;
    std::align_val_t alignment =
        static_cast<std::align_val_t>(1 << absl::Uniform(rand, 0, 6));
    // Convince the compiler that size may change, as to suppress
    // optimization/warnings around the size being too large.
    benchmark::DoNotOptimize(size);
    void* p = ::operator new[](size, alignment, std::nothrow);
    ASSERT_EQ(p, nullptr);
  }
  for (int i = 1; i < 100; ++i) {
    size_t size = absl::LogUniform(rand, 0, 1 << 20);
    std::align_val_t alignment =
        static_cast<std::align_val_t>(1 << absl::Uniform(rand, 0, 6));
    void* ptr = ::operator new[](size, alignment, std::nothrow);
    ASSERT_NE(ptr, nullptr);
    ASSERT_EQ(
        0, reinterpret_cast<uintptr_t>(ptr) % static_cast<size_t>(alignment));
    ::operator delete[](ptr, alignment, std::nothrow);
  }
}

void CheckSizedDelete() {
  absl::BitGen rand;

  std::vector<std::pair<void*, size_t>> allocated;
  for (int i = 1; i < 100; ++i) {
    size_t alloc_size = absl::LogUniform<int32_t>(rand, 0, (1 << 20) - 1);
    void* p1 = ::operator new(alloc_size);
    ASSERT_NE(p1, nullptr);
    allocated.push_back(std::make_pair(p1, alloc_size));
  }
  for (std::vector<std::pair<void*, size_t>>::const_iterator i =
           allocated.begin();
       i != allocated.end(); ++i) {
    ::operator delete(i->first, i->second);
  }
}

TEST(TCMallocTest, SizedDelete) { CheckSizedDelete(); }

TEST(TCMallocTest, SizedDeleteSampled) {
  ScopedProfileSamplingRate s(1);  // Try to sample more.
  CheckSizedDelete();
}

// Check sampled allocations return the proper size.
TEST(TCMallocTest, SampleAllocatedSize) {
  ScopedProfileSamplingRate s(1);  // Try to sample more.

  // Do 64 megabytes of allocation; this should (nearly) guarantee we
  // get a sample.
  for (int i = 0; i < 1024 * 1024; ++i) {
    void* ptr = malloc(64);
    ASSERT_EQ(64, MallocExtension::GetAllocatedSize(ptr));
    free(ptr);
  }
}

// Ensure that nallocx works before main.
struct GlobalNallocx {
  GlobalNallocx() { CHECK_CONDITION(nallocx(99, 0) >= 99); }
} global_nallocx;

#ifdef __GNUC__
// 101 is the max user priority.
static void check_global_nallocx() __attribute__((constructor(101)));
static void check_global_nallocx() { CHECK_CONDITION(nallocx(99, 0) >= 99); }
#endif

TEST(TCMallocTest, nallocx) {
  // Guarded allocations may have a smaller allocated size than nallocx
  // predicts.  So we disable guarded allocations.
  ScopedGuardedSamplingRate gs(-1);

  for (size_t size = 0; size <= (1 << 20); size += 7) {
    size_t rounded = nallocx(size, 0);
    ASSERT_GE(rounded, size);
    void* ptr = operator new(size);
    ASSERT_EQ(rounded, MallocExtension::GetAllocatedSize(ptr));
    operator delete(ptr);
  }
}

TEST(TCMallocTest, nallocx_alignment) {
  // Guarded allocations may have a smaller allocated size than nallocx
  // predicts.  So we disable guarded allocations.
  ScopedGuardedSamplingRate gs(-1);

  for (size_t size = 0; size <= (1 << 20); size += 7) {
    for (size_t align = 0; align < 10; align++) {
      size_t rounded = nallocx(size, MALLOCX_LG_ALIGN(align));
      ASSERT_GE(rounded, size);
      ASSERT_EQ(rounded % (1 << align), 0);
      void* ptr = memalign(1 << align, size);
      ASSERT_EQ(rounded, MallocExtension::GetAllocatedSize(ptr));
      free(ptr);
    }
  }
}

TEST(TCMallocTest, sdallocx) {
  for (size_t size = 0; size <= 4096; size += 7) {
    void* ptr = malloc(size);
    memset(ptr, 0, size);
    benchmark::DoNotOptimize(ptr);
    sdallocx(ptr, size, 0);
  }
}

TEST(TCMallocTest, sdallocx_alignment) {
  for (size_t size = 0; size <= 4096; size += 7) {
    for (size_t align = 3; align <= 10; align++) {
      const size_t alignment = 1 << align;
      void* ptr;
      int err = PosixMemalign(&ptr, alignment, size);
      ASSERT_EQ(err, 0) << alignment << " " << size;
      ASSERT_EQ(reinterpret_cast<uintptr_t>(ptr) & (alignment - 1), 0);
      memset(ptr, 0, size);
      benchmark::DoNotOptimize(ptr);
      sdallocx(ptr, size, MALLOCX_LG_ALIGN(align));
    }
  }
}

// Parse out a line like:
// <allocator_name>: xxx bytes allocated
// Return xxx as an int, nullopt if it can't be found
absl::optional<int64_t> ParseLowLevelAllocator(absl::string_view allocator_name,
                                               absl::string_view buf) {
  char needlebuf[32];
  int len =
      absl::SNPrintF(needlebuf, sizeof(needlebuf), "\n%s: ", allocator_name);
  CHECK_CONDITION(0 < len && len < sizeof(needlebuf));
  const absl::string_view needle = needlebuf;

  auto pos = buf.find(needle);
  if (pos == absl::string_view::npos) {
    return absl::nullopt;
  }
  // skip over the prefix.  Should now look like " <number> bytes allocated".
  pos += needle.size();
  buf.remove_prefix(pos);

  pos = buf.find_first_not_of(' ');
  if (pos != absl::string_view::npos) {
    buf.remove_prefix(pos);
  }

  pos = buf.find(' ');
  if (pos != absl::string_view::npos) {
    buf.remove_suffix(buf.size() - pos);
  }

  int64_t result;
  if (!absl::SimpleAtoi(buf, &result)) {
    return absl::nullopt;
  }
  return result;
}

// Parse out a line like:
// cpu   3:         1234 bytes (    0.0 MiB) with     3145728 bytes unallocated
// for the bytes value.  Only trick is not mallocing in the process.
int64_t ParseCPUCacheSize(absl::string_view buf, int cpu) {
  char needlebuf[32];
  int len = absl::SNPrintF(needlebuf, sizeof(needlebuf), "\ncpu %3d: ", cpu);
  CHECK_CONDITION(0 < len && len < sizeof(needlebuf));

  const absl::string_view needle = needlebuf;

  auto pos = buf.find(needle);
  if (pos == absl::string_view::npos) {
    return -1;
  }
  // skip over the prefix.  Should now look like "    <number> bytes".
  pos += needle.size();
  buf.remove_prefix(pos);

  pos = buf.find_first_not_of(' ');
  if (pos != absl::string_view::npos) {
    buf.remove_prefix(pos);
  }

  pos = buf.find(' ');
  if (pos != absl::string_view::npos) {
    buf.remove_suffix(buf.size() - pos);
  }

  int64_t result;
  if (!absl::SimpleAtoi(buf, &result)) {
    return -1;
  }
  return result;
}

// Is the program running with tcmalloc's cpu caches? We can't just
// look at the flag value, it might not be supported on this machine
// (or we might be 32-bit.)
bool UsingCPUCaches() {
  std::string stats = MallocExtension::GetStats();
  // ParseCPUCacheSize will return -1 if there are no CPU entries
  // in the malloc stats (i.e. we're not using CPU caches.)
  return 0 <= ParseCPUCacheSize(stats, 0);
}

void GetMallocStats(std::string* buffer) {
  buffer->resize(buffer->capacity());

  CHECK_CONDITION(&TCMalloc_Internal_GetStats != nullptr);
  size_t required = TCMalloc_Internal_GetStats(buffer->data(), buffer->size());
  ASSERT(required <= buffer->size());

  buffer->resize(std::min(required, buffer->size()));
}

TEST(TCMallocTest, ReclaimWorks) {
  if (!UsingCPUCaches()) {
    return;
  }

  std::string before, after;
  // Allocate strings, so that they (probably) don't need to be reallocated
  // below, and so don't perturb what we're trying to measure.
  before.reserve(1 << 18);
  after.reserve(1 << 18);

  // Generate some traffic to fill up caches.
  const int kThreads = 10;
  ThreadManager mgr;
  AllocatorHarness harness(kThreads);

  mgr.Start(kThreads, [&](int thread_id) { harness.Run(thread_id); });

  absl::SleepFor(absl::Seconds(2));

  mgr.Stop();

  // All CPUs (that we have access to...) should have full percpu caches.
  // Pick one.
  GetMallocStats(&before);
  int cpu = -1;
  ssize_t used_bytes = -1;
  for (int i = 0, num_cpus = absl::base_internal::NumCPUs(); i < num_cpus;
       ++i) {
    used_bytes = ParseCPUCacheSize(before, i);
    if (used_bytes > 0) {
      cpu = i;
      break;
    } else if (used_bytes < -1) {
      // this is the only way we can find out --per_cpu was requested, but
      // not available: no matching line in Stats.
      return;
    }
  }
  // should find at least one non-empty cpu...
  ASSERT_NE(-1, cpu);
  uint64_t released_bytes = MallocExtension::ReleaseCpuMemory(cpu);
  GetMallocStats(&after);
  // I suppose some background thread could malloc here, but I'm not too
  // worried.
  EXPECT_EQ(released_bytes, used_bytes);
  EXPECT_EQ(0, ParseCPUCacheSize(after, cpu));
}

TEST(TCMallocTest, ReclaimStable) {
  // make sure that reclamation under heavy load doesn't lead to
  // corruption.
  struct Reclaimer {
    static void Go(std::atomic<bool>* sync) {
      size_t bytes = 0;
      int iter = 0;
      while (!sync->load(std::memory_order_acquire)) {
        iter++;
        for (int i = 0, num_cpus = absl::base_internal::NumCPUs(); i < num_cpus;
             ++i) {
          bytes += MallocExtension::ReleaseCpuMemory(i);
        }
      }
    }
  };

  std::atomic<bool> sync{false};
  std::thread releaser(Reclaimer::Go, &sync);

  const int kThreads = 10;
  ThreadManager mgr;
  AllocatorHarness harness(kThreads);

  mgr.Start(kThreads, [&](int thread_id) { harness.Run(thread_id); });

  absl::SleepFor(absl::Seconds(5));

  mgr.Stop();

  sync.store(true, std::memory_order_release);
  releaser.join();
}

TEST(TCMallocTest, GetStatsReportsLowLevel) {
  std::string stats = MallocExtension::GetStats();
  fprintf(stderr, "%s\n", stats.c_str());

  absl::optional<int64_t> low_level_bytes =
      ParseLowLevelAllocator("MmapSysAllocator", stats);
  ASSERT_THAT(low_level_bytes, testing::Ne(absl::nullopt));
  EXPECT_GT(*low_level_bytes, 0);
  size_t heap_size =
      *MallocExtension::GetNumericProperty("generic.current_allocated_bytes");

  // sanity check: we must have allocated as many bytes as in the heap
  EXPECT_GE(*low_level_bytes, heap_size);
}

#if defined(__GLIBC__) && defined(__GNUC__) && !defined(__MACH__)
namespace {
template <typename T1, typename T2>
void ExpectSameAddresses(T1 v1, T2 v2) {
  // C++ language requires a constant folding on constant inputs,
  // which may result to returning false for two aliased function,
  // because the aliasing is not known at this compilation unit.
  // Use volatile here to enforce a runtime comparison.
  volatile auto p1 = reinterpret_cast<void (*)()>(v1);
  volatile auto p2 = reinterpret_cast<void (*)()>(v2);
  const bool result = p1 == p2;
  // EXPECT_EQ seems not be able to handle volatiles.
  EXPECT_TRUE(result);
}
}  // end unnamed namespace

TEST(TCMallocTest, TestAliasedFunctions) {
  void* (*operator_new)(size_t) = &::operator new;
  void* (*operator_new_nothrow)(size_t, const std::nothrow_t&) =
      &::operator new;
  void* (*operator_new_array)(size_t) = &::operator new[];
  void* (*operator_new_array_nothrow)(size_t, const std::nothrow_t&) =
      &::operator new[];

  ExpectSameAddresses(operator_new, operator_new_array);
  ExpectSameAddresses(operator_new_nothrow, operator_new_array_nothrow);

  void (*operator_delete)(void*) = &::operator delete;
  void (*operator_delete_nothrow)(void*, const std::nothrow_t&) =
      &::operator delete;
  void (*operator_delete_array)(void*) = &::operator delete[];
  void (*operator_delete_array_nothrow)(void*, const std::nothrow_t&) =
      &::operator delete[];

  ExpectSameAddresses(&::free, operator_delete);
  ExpectSameAddresses(&::free, operator_delete_nothrow);
  ExpectSameAddresses(&::free, operator_delete_array);
  ExpectSameAddresses(&::free, operator_delete_array_nothrow);
}

#endif

TEST(TcmallocSizedNewTest, SizedOperatorNewReturnsExtraCapacity) {
  // For release / no sanitizer builds, tcmalloc does return
  // the next available class size, which we know is always at least
  // properly aligned, so size 3 should always return extra capacity.
  sized_ptr_t res = tcmalloc_size_returning_operator_new(3);
  EXPECT_THAT(res.n, testing::Ge(8));
  ::operator delete(res.p);
}

TEST(TcmallocSizedNewTest, NothrowSizedOperatorNewReturnsExtraCapacity) {
  // For release / no sanitizer builds, tcmalloc does return
  // the next available class size, which we know is always at least
  // properly aligned, so size 3 should always return extra capacity.
  sized_ptr_t res = tcmalloc_size_returning_operator_new_nothrow(3);
  EXPECT_THAT(res.n, testing::Ge(8));
  ::operator delete(res.p);
}

TEST(TcmallocSizedNewTest, SizedOperatorNew) {
  for (size_t size = 0; size < 1024; ++size) {
    sized_ptr_t res = tcmalloc_size_returning_operator_new(size);
    EXPECT_NE(res.p, nullptr);
    EXPECT_GE(res.n, size);
    EXPECT_LE(size, std::max(size + 100, 2 * size));
    benchmark::DoNotOptimize(memset(res.p, 0xBF, res.n));
    ::operator delete(res.p);
  }
}

TEST(TcmallocSizedNewTest, NothrowSizedOperatorNew) {
  for (size_t size = 0; size < 64 * 1024; ++size) {
    sized_ptr_t res = tcmalloc_size_returning_operator_new_nothrow(size);
    EXPECT_NE(res.p, nullptr);
    EXPECT_GE(res.n, size);
    EXPECT_LE(size, std::max(size + 100, 2 * size));
    benchmark::DoNotOptimize(memset(res.p, 0xBF, res.n));
    ::operator delete(res.p);
  }
}

TEST(TcmallocSizedNewTest, InvalidSizedOperatorNewAlwaysFails) {
  constexpr size_t kBadSize = std::numeric_limits<size_t>::max();
  EXPECT_DEATH(tcmalloc_size_returning_operator_new(kBadSize), ".*");
}

TEST(TcmallocSizedNewTest, InvalidNothrowSizedOperatorNew) {
  constexpr size_t kBadSize = std::numeric_limits<size_t>::max();
  sized_ptr_t res = tcmalloc_size_returning_operator_new_nothrow(kBadSize);
  EXPECT_EQ(res.p, nullptr);
  EXPECT_EQ(res.n, 0);
}

TEST(TcmallocSizedNewTest, SizedOperatorNewMatchesMallocExtensionValue) {
  // Set reasonable sampling and guarded sampling probabilities.
  ScopedProfileSamplingRate s(20);
  ScopedGuardedSamplingRate gs(20);
  constexpr size_t kOddIncrement = 117;

  // Traverse clean power 2 / common size class / page sizes
  for (size_t size = 32; size <= 2 * 1024 * 1024; size *= 2) {
    sized_ptr_t r = tcmalloc_size_returning_operator_new(size);
    ASSERT_EQ(r.n, MallocExtension::GetAllocatedSize(r.p));
    ::operator delete(r.p, r.n);
  }

  // Traverse randomized sizes
  for (size_t size = 32; size <= 2 * 1024 * 1024; size += kOddIncrement) {
    sized_ptr_t r = tcmalloc_size_returning_operator_new(size);
    ASSERT_EQ(r.n, MallocExtension::GetAllocatedSize(r.p));
    ::operator delete(r.p, r.n);
  }
}

TEST(SizedDeleteTest, SizedOperatorDelete) {
  enum DeleteSize { kSize, kCapacity, kHalfway };
  for (size_t size = 0; size < 64 * 1024; ++size) {
    for (auto delete_size : {kSize, kCapacity, kHalfway}) {
      sized_ptr_t res = tcmalloc_size_returning_operator_new(size);
      switch (delete_size) {
        case kSize:
          ::operator delete(res.p, size);
          break;
        case kCapacity:
          ::operator delete(res.p, res.n);
          break;
        case kHalfway:
          ::operator delete(res.p, (size + res.n) / 2);
          break;
      }
    }
  }
}

TEST(SizedDeleteTest, NothrowSizedOperatorDelete) {
  for (size_t size = 0; size < 64 * 1024; ++size) {
    sized_ptr_t res = tcmalloc_size_returning_operator_new(size);
    ::operator delete(res.p, std::nothrow);
  }
}

}  // namespace
}  // namespace tcmalloc
